## Load libraries
suppressPackageStartupMessages({
library(ArchR)
library(rhdf5)
library(tidyverse)
library(reticulate)
library(zellkonverter)
library(Matrix)
library(dichromat)
library(Seurat)
#library(caret)
h5disableFileLocking()})
rna_seurat <- readRDS("Seurat_objects/rna_Seurat_object")
hvg <- VariableFeatures(rna_seurat)
# directory where to save the figures
plot_dir <- "/omics/groups/OE0533/internal/katharina/scDoRI/gastrulation_data_new/Kathi/plots/report_chromvar/"
# get gene expression matrix
proj <- loadArchRProject("12_Ricards_peaks_ChromVar")
# get the metadata
metadata <- as.data.frame(getCellColData(proj))
# get motif matrix
motifs <- getMatrixFromProject(proj, useMatrix = "MotifMatrix")
motif_mtx <- assays(motifs)[[2]]
# remove index number from TFs
tfs <- gsub("_.*", "", rownames(motifs))
rownames(motif_mtx) <- tfs
gene_scores <- getMatrixFromProject(proj, useMatrix = "GeneScoreMatrix")
gene_scores_mat <- assays(gene_scores)[[1]]
rownames(gene_scores_mat) <- rowData(gene_scores)$name
colnames(gene_scores_mat) <- colnames(gene_scores)
proj1 <- loadArchRProject("/omics/groups/OE0533/internal/katharina/scDoRI/gastrulation_data_new/Kathi/06_deep_chromvar/")
# get motif matrix
deep_motifs <- getMatrixFromProject(proj1, useMatrix = "DeepLearningMotifs1")
deep_motif_mtx <- assays(deep_motifs)[[2]]
rownames(deep_motif_mtx) <- tolower(rownames(deep_motif_mtx))
substr(rownames(deep_motif_mtx), 1, 1) <- toupper(substr(rownames(deep_motif_mtx), 1, 1))
tfs <- rownames(deep_motif_mtx)
metadata <- colData(deep_motifs) %>% as.data.frame()
plot_score_per_celltypes <- function(tf, score_matrix, metadata_df, y_label){
motif_n <- score_matrix[rownames(score_matrix) %in% tf, ]
plot <- metadata_df %>%
mutate(!!tf := motif_n) %>%
group_by(celltypes) %>%
summarise(mean = mean(!!(sym(tf)))) %>%
ggplot() +
geom_bar(aes(x = celltypes, y = mean, fill = celltypes), stat = "identity") +
scale_fill_manual(values = col) +
labs(y = paste0(y_label),
title = paste0(n)) +
BAR_THEME
return(plot)
}
scatterplot <- function(tf, score_matrix_x, score_matrix_y, metadata_df, x_label, y_label){
motif_x <- score_matrix_x[rownames(score_matrix_x) %in% tf, ]
motif_y <- score_matrix_y[rownames(score_matrix_y) %in% tf, ]
plot <- metadata_df %>%
mutate(tf_x := motif_x, tf_y := motif_y) %>%
group_by(celltypes) %>%
summarise_at(vars(tf_x, tf_y), mean) %>%
ggplot() +
geom_smooth(aes(x = tf_x, y = tf_y),
formula = y ~ x, method = "lm", size =.1) +
geom_point(aes(x = tf_x, y = tf_y, col = celltypes, size = 1)) +
scale_color_manual(values = col) +
labs(title = paste0(tf),
x = paste0(x_label),
y = paste0(y_label)) +
SCATTER_THEME
return(plot)
}
BAR_THEME <- theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 10),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
plot.title = element_text(hjust = 0.5, size = 20),
legend.position = "none",
panel.grid.major = element_line(colour = "grey"), # Major grid lines
panel.background = element_rect(fill = "white", colour = "black"))
SCATTER_THEME <- theme(axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
plot.title = element_text(hjust = 0.5, size = 20),
legend.position = "none",
panel.grid.major = element_line(colour = "grey"), # Major grid lines
panel.background = element_rect(fill = "white", colour = "black"))
colPalette_celltypes = c('#532C8A',
'#c19f70',
'#f9decf',
'#c9a997',
'#B51D8D',
'#3F84AA',
'#9e6762',
'#354E23',
'#F397C0',
'#ff891c',
'#635547',
'#C72228',
'#f79083',
'#EF4E22',
'#989898',
'#7F6874',
'#8870ad',
'#647a4f',
'#EF5A9D',
'#FBBE92',
'#139992',
'#cc7818',
'#DFCDE4',
'#8EC792',
'#C594BF',
'#C3C388',
'#0F4A9C',
'#FACB12',
'#8DB5CE',
'#1A1A1A',
'#C9EBFB',
'#DABE99',
'#65A83E',
'#005579',
'#CDE088',
'#f7f79e',
'#F6BFCB')
celltypes <- (as.data.frame(getCellColData(proj)) %>% group_by(celltypes) %>%
summarise(n = n()))$celltypes
col <- setNames(colPalette_celltypes, celltypes)
Select a few marker genes and transcription factors:
marker_genes <- c("Lamb1", "Sparc", "Elf5", "Ascl2", "Tfap2c", "Ttr",
"Apoa2", "Apoe", "Cystm1", "Emb", "Spink1", "Krt19",
"Dkk1", "Grhl3", "Trp63", "Grhl2", "Pax6", "Pax2",
"En1", "Foxd3", "Tfap2a", "Pax3", "Sox9",
"Six3", "Hesx1", "Irx3", "Hoxb9", "Cdx4",
"Hes3", "Hba-a2", "Hba-a1", "Hbb-bh1", "Gata1",
"Gata6",
#"Gata6", "Gata5",
"Cited4",
"Cdh5", "Pecam1", "Anxa5", "Etv2", "Igf2",
"Krt8", "Krt18", "Pmp22", "Ahnak", "Bmp4", "Tbx4", "Hoxa11",
"Hoxa10", "Tnnt2", "Myl4", "Myl7", "Acta2",
"Smarcd3", "Tcf21", "Tbx6", "Dll1", "Aldh1a2", "Tcf15",
"Meox1", "Tbx1", "Gbx2", "Cdx1", "Hoxb1", "Hes7", "Osr1",
"Mesp2", "Lefty2", "Mesp1", "Cer1", "Chrd",
"Foxa2", "Pax7", "Fgf8", "Lhx1", "Mixl1", "Otx2", "Hhex",
"Ifitm3", "Nkx1-2", "Eomes", "Nanog", "Utf1",
"Epcam", "Pou5f1" )
#"Sox2"
#"Gata2"
#"Gata4"
# "Gata5",
#"Gata6",
# "Gsc",
markers <- c("Elf5", "Pax2", "Pax6", "Pax3", "Pax7", "Hoxb9", "Cdx4", "Hoxa11", "Hoxa10", "Tcf15",
"Tbx1", "Tbx6", "Mesp2", "Mesp1", "Pouf51", "Gata1", "Gata2", "Gata3", "Gata4", "Gata6", "Sox10",
"Sox11" ,"Sox13","Sox15","Sox17"
,"Sox2" , "Sox3", "Sox30", "Sox4", "Sox5", "Sox6", "Sox9", "Klf1", "Klf3", "Klf4", "Klf5", "Klf9" )
# select only markers present in all three matrices
markers <- markers[markers %in% rownames(deep_motif_mtx)]
markers <- markers[markers %in% rownames(motif_mtx)]
markers <- markers[markers %in% rownames(gene_scores_mat)]
for (n in markers){
## BAR PLOTS
score_plot <- plot_score_per_celltypes(n, gene_scores_mat, metadata,
y_label = "Gene activity score")
ggsave(paste0(plot_dir, "score", n, ".pdf"))
motif_plot <- plot_score_per_celltypes(n, motif_mtx, metadata,
y_label = "ChromVar scores")
ggsave(paste0(plot_dir, "motif", n, ".pdf"))
deep_plot <- plot_score_per_celltypes(n, deep_motif_mtx, metadata,
y_label = "Deep learning ChromVar scores")
ggsave(paste0(plot_dir, "deep", n, ".pdf"), deep_plot)
## SCATTER PLOTS
scatter_motif <- scatterplot(n, gene_scores_mat, motif_mtx, metadata,
x_label = "Gene activity scores",
y_label = "ChromVar scores")
ggsave(paste0(plot_dir, "scatter_motif", n, ".pdf"), scatter_motif)
scatter_deep <- scatterplot(n, gene_scores_mat, deep_motif_mtx, metadata,
x_label = "Gene activity scores",
y_label = "Deep learning ChromVar scores")
ggsave(paste0(plot_dir, "scatter_deep", n, ".pdf"), scatter_deep)
## Combine Plots
print(cowplot::plot_grid(score_plot, motif_plot, deep_plot,
scatter_motif, scatter_deep, ncol = 3))
}
markers <- c("Lamb1", "Sparc", "Elf5", "Ascl2", "Tfap2c", "Ttr",
"Apoa2", "Apoe", "Cystm1", "Emb", "Spink1", "Krt19",
"Dkk1", "Grhl3", "Trp63", "Grhl2", "Pax6", "Pax2",
"En1", "Foxd3", "Tfap2a", "Pax3", "Sox9",
"Six3", "Hesx1", "Irx3", "Hoxb9", "Cdx4",
"Hes3", "Hba-a2", "Hba-a1", "Hbb-bh1", "Gata1",
"Gata6",
#"Gata6", "Gata5",
"Cited4",
"Cdh5", "Pecam1", "Anxa5", "Etv2", "Igf2",
"Krt8", "Krt18", "Pmp22", "Ahnak", "Bmp4", "Tbx4", "Hoxa11",
"Hoxa10", "Tnnt2", "Myl4", "Myl7", "Acta2",
"Smarcd3", "Tcf21", "Tbx6", "Dll1", "Aldh1a2", "Tcf15",
"Meox1", "Tbx1", "Gbx2", "Cdx1", "Hoxb1", "Hes7", "Osr1",
"Mesp2", "Lefty2", "Mesp1", "Cer1", "Chrd",
"Foxa2", "Pax7", "Fgf8", "Lhx1", "Mixl1", "Otx2", "Hhex",
"Ifitm3", "Nkx1-2", "Eomes", "Nanog", "Utf1",
"Epcam", "Pou5f1" )
# select only markers prsent in both matrices
markers <- markers[markers %in% rownames(motif_mtx)]
markers <- markers[markers %in% rownames(gene_scores_mat)]
for (n in markers){
## BAR PLOTS
score_plot <- plot_score_per_celltypes(n, gene_scores_mat, metadata,
y_label = "Gene activity score")
motif_plot <- plot_score_per_celltypes(n, motif_mtx, metadata,
y_label = "ChromVar scores")
## SCATTER PLOTS
scatter_motif <- scatterplot(n, gene_scores_mat, motif_mtx, metadata,
x_label = "Gene activity scores",
y_label = "ChromVar scores")
## Combine Plots
print(cowplot::plot_grid(score_plot, motif_plot,
scatter_motif, ncol = 2))
}
```#{r, fig.width=6, fig.height=6} for (n in c(“Gata1”, “Gata2”, “Gata3”, “Gata4”, “Gata5”, “Gata6”)) { print(n) # select one row for a particular gene score_n <- gene_scores_mat[rownames(gene_scores_mat) %in% n, ] # add score for this gene to metadata metadata[paste0(“score_”,n)] <- score_n # select gene expression for a particular gene expr_n <- lognorm[rownames(lognorm) %in% c(n), ] metadata[paste0(“expr_”, n)] <- expr_n
seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0(“^”, n), tfs)], ] sea_meta[paste0(“seacell_”, n)] <- seacells_n
# select motif z score motif_n <- motif_mtx[rownames(motif_mtx) == tfs[grepl(paste0(“^”, n), tfs)], ] metadata[paste0(“motif_”, n)] <- motif_n
# create barplots for gene scores, gene expression and motif z-score plots <- map(c(“score_”, “motif_”), function(p){ df <- metadata %>% group_by(celltypes) %>% summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE))) plot <- df %>% ggplot() + geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), fill = celltypes), stat = “identity”) + scale_fill_manual(values = col) + labs(title = paste0(n), x = “celltype”, y = paste0(p)) + BAR_THEME print(plot) ggsave(paste0(plot_dir, p, n, “.pdf”)) print(plot) }) sea_plot <- map(seq.int(1), function(sea){ plot <- sea_meta %>% group_by(celltypes) %>% summarise(mean = mean(!!sym(paste0(“seacell_”,n)))) %>% ggplot() + geom_bar(aes(x = celltypes, y = mean, fill = celltypes), stat = “identity”) + scale_fill_manual(values = col) + labs(title = paste0(n), y = “SEACell deviation score”) + BAR_THEME print(plot) ggsave(paste0(plot_dir, “seacell_bar_”, n, “.pdf”)) print(plot)
}) # create scatter plots for gene expression and motif z-score scatter_plots <- map(seq.int(1), function(s){ df <- metadata %>% group_by(celltypes) %>% summarise_at(vars(paste0(“motif_”, n), paste0(“score_”, n)), funs(mean(., na.rm = TRUE))) plot <- df %>% ggplot() + geom_smooth(aes(x = df %>% pull(paste0(“score_”, n)), y = df %>% pull(paste0(“motif_”, n))), formula = y ~ x, method = “lm”, size = .1) + geom_point(aes(x = df %>% pull(paste0(“score_”, n)), y = df %>% pull(paste0(“motif_”, n)), col = celltypes, size = 1)) + SCATTER_THEME + #labs(x = “Gata1 gene expression”, y = “Gata1 motif accessibility (z-score)”) + scale_color_manual(values = col) + labs(title = paste0(n), x = “gene activity score”, y = “TF-motif z-score”) print(plot) ggsave(paste0(plot_dir, “scatterPlot_”, n, “.pdf”)) print(plot) })
do.call(gridExtra::grid.arrange, c(plots, sea_plot, scatter_plots, ncol = 2)) #%>% ggsave(paste0(plot_dir, n, “.pdf”))
}
# Marker Genes
## SEACells
```#{r, fig.width=6, fig.height=6, eval = FALSE}
for (n in marker_genes) {
print(n)
# select one row for a particular gene
score_n <- gene_scores_mat[rownames(gene_scores_mat) %in% c(n), ]
# add score for this gene to metadata
metadata[paste0("score_",n)] <- score_n
# select gene expression for a particular gene
expr_n <- lognorm[rownames(lognorm) %in% c(n), ]
metadata[paste0("expr_", n)] <- expr_n
seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0("^", n), tfs)], ]
sea_meta[paste0("seacell_", n)] <- seacells_n
# if the marker gene is a TF
if (length(tfs[grepl(paste0("^", n), tfs)]) > 0) {
# select motif z score
motif_n <- motif_mtx[rownames(motif_mtx) == tfs[grepl(paste0("^", n), tfs)], ]
metadata[paste0("motif_", n)] <- motif_n
# create barplots for gene scores, gene expression and motif z-score
plot <- map(c("score_", "motif_"), function(p){
df <- metadata %>%
group_by(celltypes) %>%
summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
plot <- df %>% ggplot() +
geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)),
fill = celltypes), stat = "identity") +
scale_fill_manual(values = col) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "none") +
labs(title = paste0(n), x = "celltype", y = paste0(p)) +
BAR_THEME
print(plot)
ggsave(paste0(plot_dir, p, n, ".pdf"))
print(plot)
})
# create scatter plots for gene expression and motif z-score
scatter_plots <- map(seq.int(1), function(s){
df <- metadata %>% group_by(celltypes) %>%
summarise_at(vars(paste0("motif_", n),
paste0("score_", n)),
funs(mean(., na.rm = TRUE)))
plot <- df %>%
ggplot() +
geom_smooth(aes(x = df %>% pull(paste0("score_", n)),
y = df %>% pull(paste0("motif_", n))),
formula = y ~ x, method = "lm", size = .1) +
geom_point(aes(x = df %>% pull(paste0("score_", n)),
y = df %>% pull(paste0("motif_", n)),
col = celltypes, size = 1)) +
#labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
theme(legend.position = "none") +
scale_color_manual(values = col) +
labs(title = paste0(n), x = "gene activity score", y = "TF-motif z-score") +
BAR_THEME
print(plot)
ggsave(paste0(plot_dir, "scatterPlot_", n, ".pdf"))
print(plot)
})
sea_plot <- map(seq.int(1), function(sea){
plot <- sea_meta %>%
group_by(celltypes) %>%
summarise(mean = mean(!!sym(paste0("seacell_",n)))) %>%
ggplot() +
geom_bar(aes(x = celltypes, y = mean, fill = celltypes), stat = "identity") +
scale_fill_manual(values = col) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), legend.position = "None") +#%>% print()
labs(title = paste0(n), y = "SEACell deviation score") +
BAR_THEME
print(plot)
ggsave(paste0(plot_dir, "seacell_bar_", n, ".pdf"))
print(plot)
})
# if the marker gene is no TF
} else {
print("no")
# # create barplots for gene scores, gene expression and motif z-score
# plots <- map(c("score_"), function(p){
# df <- metadata %>%
# group_by(celltypes) %>%
# summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
# plot <- df %>% ggplot() +
# geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)),
# fill = celltypes), stat = "identity") +
# scale_fill_manual(values = col) +
# theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
# legend.position = "none") +
# labs(title = paste0(n), x = "celltype", y = paste0(p)) +
# BAR_THEME
# print(plot)
# ggsave(paste0(plot_dir, p, n, ".pdf"))
# print(plot)
#
# })
#
# # create scatter plots for gene expression and gene_score
# scatter_plots <- map(seq.int(1), function(s){
# df <- metadata %>% group_by(celltypes) %>%
# summarise_at(vars(paste0("score_", n),
# paste0("expr_", n)),
# funs(mean(., na.rm = TRUE)))
# plot <- df %>%
# ggplot() +
# geom_smooth(aes(x = df %>% pull(paste0("expr_", n)),
# y = df %>% pull(paste0("score_", n))),
# formula = y ~ x, method = "lm", size = .1) +
# geom_point(aes(x = df %>% pull(paste0("expr_", n)),
# y = df %>% pull(paste0("score_", n)),
# col = celltypes, size = 1)) +
# #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
# theme(legend.position = "none") +
# scale_color_manual(values = col)+
# labs(title = paste0(n), x = "gene expression", y = "gene activity score") +
# BAR_THEME
# print(plot)
# ggsave(paste0(plot_dir, "scatterPlot_", n, ".pdf"))
# print(plot)
#})
}
#do.call(gridExtra::grid.arrange, c(plots, scatter_plots, ncol = 2, nrow = 2))
#
}
proj1 <- loadArchRProject("/omics/groups/OE0533/internal/katharina/scDoRI/gastrulation_data_new/Kathi/06_deep_chromvar/")
deep <- getMatrixFromProject(proj1, useMatrix = "DeepLearningMotifs1")
deep_matrix <- assays(deep)[[2]]
deep_matrix <- deep_matrix[, colnames(motif_mtx)]
stopifnot(all(colnames(motif_mtx) == colnames(deep_matrix)))
#test <- rownames(deep_matrix)
rownames(deep_matrix) <- tolower(rownames(deep_matrix))
substr(rownames(deep_matrix), 1, 1) <- toupper(substr(rownames(deep_matrix), 1, 1))
markers <- c("Elf5", "Pax2", "Pax6", "Pax3", "Pax7", "Hoxb9", "Cdx4", "Hoxa11", "Hoxa10", "Tcf15",
"Tbx1", "Tbx6", "Mesp2", "Mesp1", "Pouf51", "Gata1", "Gata2", "Gata3", "Gata4", "Gata6", "Sox10",
"Sox11" ,"Sox13","Sox15","Sox17"
,"Sox2" , "Sox3", "Sox30", "Sox4", "Sox5", "Sox6", "Sox9", "Klf1", "Klf3", "Klf4", "Klf5", "Klf9" )
markers <- markers[markers %in% rownames(deep_matrix)]
markers <- markers[markers %in% rownames(gene_scores_mat)]
markers
```#{r, fig.width=6, fig.height=6} plot_dir <- “/omics/groups/OE0533/internal/katharina/scDoRI/gastrulation_data_new/Kathi/plots/deep_ChromVar/”
for (n in markers) { print(n) # select one row for a particular gene score_n <- gene_scores_mat[rownames(gene_scores_mat) %in% n, ] # add score for this gene to metadata metadata[paste0(“score_”,n)] <- score_n # select gene expression for a particular gene # expr_n <- lognorm[rownames(lognorm) %in% c(n), ] # metadata[paste0(“expr_”, n)] <- expr_n
# seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0(“^”, n), tfs)], ] # sea_meta[paste0(“seacell_”, n)] <- seacells_n
# select motif z score deep_n <- deep_matrix[rownames(deep_matrix) %in% n, ] metadata[paste0(“deep_”, n)] <- deep_n
# create barplots for gene scores, gene expression and motif z-score plots <- map(c(“score_”, “deep_”), function(p){ df <- metadata %>% group_by(celltypes) %>% summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE))) plot <- df %>% ggplot() + geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), fill = celltypes), stat = “identity”) + scale_fill_manual(values = col) + labs(title = paste0(n), x = “celltype”, y = paste0(p)) + BAR_THEME print(plot) ggsave(paste0(plot_dir, p, n, “.pdf”)) print(plot) }) scatter_plots <- map(seq.int(1), function(s){ df <- metadata %>% group_by(celltypes) %>% summarise_at(vars(paste0(“deep_”, n), paste0(“score_”, n)), funs(mean(., na.rm = TRUE))) plot <- df %>% ggplot() + geom_smooth(aes(x = df %>% pull(paste0(“score_”, n)), y = df %>% pull(paste0(“deep_”, n))), formula = y ~ x, method = “lm”, size = .1) + geom_point(aes(x = df %>% pull(paste0(“score_”, n)), y = df %>% pull(paste0(“deep_”, n)), col = celltypes, size = 1)) + SCATTER_THEME + #labs(x = “Gata1 gene expression”, y = “Gata1 motif accessibility (z-score)”) + scale_color_manual(values = col) + labs(title = paste0(n), x = “gene activity score”, y = “TF-motif z-score (deep learning)”) print(plot) ggsave(paste0(plot_dir, “scatterPlot_”, n, “.pdf”)) print(plot) })
do.call(gridExtra::grid.arrange, c(plots, sea_plot, scatter_plots, ncol = 2)) #%>% ggsave(paste0(plot_dir, n, “.pdf”))
}
```#{r}
for (n in marker_genes) {
print(n)
# select one row for a particular gene
score_n <- gene_scores_mat[rownames(gene_scores_mat) %in% c(n), ]
# add score for this gene to metadata
metadata[paste0("score_",n)] <- score_n
# select gene expression for a particular gene
expr_n <- lognorm[rownames(lognorm) %in% c(n), ]
metadata[paste0("expr_", n)] <- expr_n
seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0("^", n), tfs)], ]
sea_meta[paste0("seacell_", n)] <- seacells_n
# if the marker gene is a TF
if (length(tfs[grepl(paste0("^", n), tfs)]) > 0) {
# select motif z score
motif_n <- motif_mtx[rownames(motif_mtx) == tfs[grepl(paste0("^", n), tfs)], ]
metadata[paste0("motif_", n)] <- motif_n
# create barplots for gene scores, gene expression and motif z-score
plot <- map(c("score_", "motif_"), function(p){
df <- metadata %>%
group_by(celltypes) %>%
summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
plot <- df %>% ggplot() +
geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)),
fill = celltypes), stat = "identity") +
scale_fill_manual(values = col) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "none") +
labs(title = paste0(n), x = "celltype", y = paste0(p)) +
BAR_THEME
print(plot)
ggsave(paste0(plot_dir, p, n, ".pdf"))
print(plot)
})
# create scatter plots for gene expression and motif z-score
scatter_plots <- map(seq.int(1), function(s){
df <- metadata %>% group_by(celltypes) %>%
summarise_at(vars(paste0("motif_", n),
paste0("score_", n)),
funs(mean(., na.rm = TRUE)))
plot <- df %>%
ggplot() +
geom_smooth(aes(x = df %>% pull(paste0("score_", n)),
y = df %>% pull(paste0("motif_", n))),
formula = y ~ x, method = "lm", size = .1) +
geom_point(aes(x = df %>% pull(paste0("score_", n)),
y = df %>% pull(paste0("motif_", n)),
col = celltypes, size = 1)) +
#labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
theme(legend.position = "none") +
scale_color_manual(values = col) +
labs(title = paste0(n), x = "gene activity score", y = "TF-motif z-score") +
BAR_THEME
print(plot)
ggsave(paste0(plot_dir, "scatterPlot_", n, ".pdf"))
print(plot)
})
sea_plot <- map(seq.int(1), function(sea){
plot <- sea_meta %>%
group_by(celltypes) %>%
summarise(mean = mean(!!sym(paste0("seacell_",n)))) %>%
ggplot() +
geom_bar(aes(x = celltypes, y = mean, fill = celltypes), stat = "identity") +
scale_fill_manual(values = col) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), legend.position = "None") +#%>% print()
labs(title = paste0(n), y = "SEACell deviation score") +
BAR_THEME
print(plot)
ggsave(paste0(plot_dir, "seacell_bar_", n, ".pdf"))
print(plot)
})
# if the marker gene is no TF
} else {
print("no")
# # create barplots for gene scores, gene expression and motif z-score
# plots <- map(c("score_"), function(p){
# df <- metadata %>%
# group_by(celltypes) %>%
# summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
# plot <- df %>% ggplot() +
# geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)),
# fill = celltypes), stat = "identity") +
# scale_fill_manual(values = col) +
# theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
# legend.position = "none") +
# labs(title = paste0(n), x = "celltype", y = paste0(p)) +
# BAR_THEME
# print(plot)
# ggsave(paste0(plot_dir, p, n, ".pdf"))
# print(plot)
#
# })
#
# # create scatter plots for gene expression and gene_score
# scatter_plots <- map(seq.int(1), function(s){
# df <- metadata %>% group_by(celltypes) %>%
# summarise_at(vars(paste0("score_", n),
# paste0("expr_", n)),
# funs(mean(., na.rm = TRUE)))
# plot <- df %>%
# ggplot() +
# geom_smooth(aes(x = df %>% pull(paste0("expr_", n)),
# y = df %>% pull(paste0("score_", n))),
# formula = y ~ x, method = "lm", size = .1) +
# geom_point(aes(x = df %>% pull(paste0("expr_", n)),
# y = df %>% pull(paste0("score_", n)),
# col = celltypes, size = 1)) +
# #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
# theme(legend.position = "none") +
# scale_color_manual(values = col)+
# labs(title = paste0(n), x = "gene expression", y = "gene activity score") +
# BAR_THEME
# print(plot)
# ggsave(paste0(plot_dir, "scatterPlot_", n, ".pdf"))
# print(plot)
#})
}
#do.call(gridExtra::grid.arrange, c(plots, scatter_plots, ncol = 2, nrow = 2))
#
}
Plot gene expression, gene scores & motif z-scores:
```#{r, fig.width=10, fig.height=10}
for (n in marker_genes) { print(n) # select one row for a particular gene score_n <- gene_scores_mat[rownames(gene_scores_mat) %in% c(n), ] # add score for this gene to metadata metadata[paste0(“score_”,n)] <- score_n # select gene expression for a particular gene expr_n <- lognorm[rownames(lognorm) %in% c(n), ] metadata[paste0(“expr_”, n)] <- expr_n
seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0(“^”, n), tfs)], ] sea_meta[paste0(“seacell_”, n)] <- seacells_n
# if the marker gene is a TF if (length(tfs[grepl(paste0(“^”, n), tfs)]) > 0) {
# select motif z score
motif_n <- motif_mtx[rownames(motif_mtx) == tfs[grepl(paste0("^", n), tfs)], ]
metadata[paste0("motif_", n)] <- motif_n
# create barplots for gene scores, gene expression and motif z-score
plots <- map(c("score_", "expr_", "motif_"), function(p){
df <- metadata %>%
group_by(celltypes) %>%
summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
df %>% ggplot() +
geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)),
fill = celltypes), stat = "identity") +
scale_fill_manual(values = col) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "none") +
labs(title = paste0(n), x = "celltype", y = paste0(p))
})
# create scatter plots for gene expression and motif z-score
scatter_plots <- map(seq.int(1), function(s){
df <- metadata %>% group_by(celltypes) %>%
summarise_at(vars(paste0("motif_", n),
paste0("score_", n)),
funs(mean(., na.rm = TRUE)))
df %>%
ggplot() +
geom_smooth(aes(x = df %>% pull(paste0("score_", n)),
y = df %>% pull(paste0("motif_", n))),
formula = y ~ x, method = "lm", size = .1) +
geom_point(aes(x = df %>% pull(paste0("score_", n)),
y = df %>% pull(paste0("motif_", n)),
col = celltypes, size = 1)) +
#labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
theme(legend.position = "none") +
scale_color_manual(values = col) +
labs(title = paste0(n), x = "gene activity score", y = "TF-motif z-score")
})
# if the marker gene is no TF
} else {
# create barplots for gene scores, gene expression and motif z-score
plots <- map(c("score_", "expr_"), function(p){
df <- metadata %>%
group_by(celltypes) %>%
summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
df %>% ggplot() +
geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)),
fill = celltypes), stat = "identity") +
scale_fill_manual(values = col) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "none") +
labs(title = paste0(n), x = "celltype", y = paste0(p))
})
# create scatter plots for gene expression and gene_score
scatter_plots <- map(seq.int(1), function(s){
df <- metadata %>% group_by(celltypes) %>%
summarise_at(vars(paste0("score_", n),
paste0("expr_", n)),
funs(mean(., na.rm = TRUE)))
df %>%
ggplot() +
geom_smooth(aes(x = df %>% pull(paste0("expr_", n)),
y = df %>% pull(paste0("score_", n))),
formula = y ~ x, method = "lm", size = .1) +
geom_point(aes(x = df %>% pull(paste0("expr_", n)),
y = df %>% pull(paste0("score_", n)),
col = celltypes, size = 1)) +
#labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
theme(legend.position = "none") +
scale_color_manual(values = col)+
labs(title = paste0(n), x = "gene expression", y = "gene activity score")
})
}
do.call(gridExtra::grid.arrange, c(plots, scatter_plots, ncol = 2, nrow = 2)) #
}
```#{r, fig.width=10, fig.height=10}
p1 <- metadata %>%
group_by(celltypes) %>%
summarise_at(vars(Gata1), funs(median(., na.rm=TRUE))) %>% ggplot() +
geom_bar(aes(x = celltypes, y = Gata1, fill = celltypes), stat = "identity") +
scale_fill_manual(values = col) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "none")
p2 <- metadata %>%
group_by(celltypes) %>%
summarise_at(vars(gata1_score_p2g), funs(mean(., na.rm=TRUE))) %>% ggplot() +
geom_bar(aes(x = celltypes, y = gata1_score_p2g, fill = celltypes), stat = "identity") +
scale_fill_manual(values = col) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "none")
p3 <- metadata %>% group_by(celltypes) %>%
summarise_at(vars(gata1_z_scores, Gata1), funs(mean(., na.rm = TRUE))) %>%
ggplot() +
geom_smooth(aes(x = Gata1, y = gata1_z_scores), formula = y ~ x, method = "lm", size = .1) +
geom_point(aes(x = Gata1, y = gata1_z_scores, col = celltypes, size = 1)) +
labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
theme(legend.position = "none") +
scale_color_manual(values = col)
p4 <- metadata %>%
group_by(celltypes) %>%
summarise_at(vars(gata1_z_scores), funs(mean(., na.rm=TRUE))) %>% ggplot() +
geom_bar(aes(x = celltypes, y = gata1_z_scores, fill = celltypes), stat = "identity") +
scale_fill_manual(values = col) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "none")
cowplot::plot_grid(p1, p2, p3, p4)
p1 <- metadata %>%
group_by(celltypes) %>%
summarise_at(vars(Sox9), funs(mean(., na.rm=TRUE))) %>% ggplot() +
geom_bar(aes(x = celltypes, y = Sox9, fill = celltypes), stat = "identity") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "none") +
scale_fill_manual(values = col)
p2 <- metadata %>%
group_by(celltypes) %>%
summarise_at(vars(sox9_z_scores), funs(mean(., na.rm=TRUE))) %>% ggplot() +
geom_bar(aes(x = celltypes, y = sox9_z_scores, fill = celltypes), stat = "identity") +
scale_fill_manual(values = col) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "none")
p3 <- metadata %>%
group_by(celltypes) %>%
summarise_at(vars(sox9_score_p2g), funs(mean(., na.rm=TRUE))) %>% ggplot() +
geom_bar(aes(x = celltypes, y = sox9_score_p2g, fill = celltypes), stat = "identity") +
scale_fill_manual(values = col) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "none")
p4 <- metadata %>% group_by(celltypes) %>%
summarise_at(vars(sox9_z_scores, Sox9), funs(mean(., na.rm = TRUE))) %>%
ggplot() +
geom_smooth(aes(x = Sox9, y = sox9_z_scores), formula = y ~ x, method = "lm", size = .1) +
geom_point(aes(x = Sox9, y = sox9_z_scores, col = celltypes, size = 1)) +
labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
theme(legend.position = "none") +
scale_color_manual(values = col)
cowplot::plot_grid(p1, p2, p3, p4)